Stellar Systems in blue, Confirmed Exoplanetary systems in red
Introduction
¶In [8]:
#imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import seaborn as sns
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.dummy import DummyRegressor
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
#jupyter nbconvert --to html model.ipynb
In [ ]:
#data processing
#csv to pandas
exoDf = pd.read_csv('ExoplanetData.csv')
#remove HD from HD name column, redundant, also remove A
exoDf['hd_name'] = exoDf['hd_name'].str.extract('(\d+)', expand=False)
exoDf['hd_name'] = pd.to_numeric(exoDf['hd_name'], errors='coerce').fillna(exoDf['hd_name'])
exoDf['hd_name'] = exoDf['hd_name'].astype('Int64')
In [ ]:
#csv
hdDf = pd.read_csv('HDCatalogue.csv', sep=';')
In [5]:
#currently there are 121 different columns, lets reduce this to some usable ones.
#lets keep time discovered, name, plus anything that can be applied for regression,
#leaving out flags, discovery methods, etc.
#lets make a model that predicts based on our exoDf, the number of planets
#only keep necessary columns
#can make 3d plot of all observed exoplanets
columns_to_keep = ['pl_name', 'hostname', 'hd_name', 'hip_name', 'tic_id',
'gaia_id', 'sy_snum', 'sy_pnum', 'sy_mnum',
'st_spectype', 'st_teff', 'st_rad', 'st_mass',
'st_logg', 'st_age', 'st_dens', 'ra', 'dec', 'sy_dist',]
exoDf = exoDf[columns_to_keep]
#first, we have very small amount of missing data, lets mean impute our mumeric columns.
columns_to_impute = ['st_age', 'st_mass', 'st_dens', 'st_rad', 'st_teff', 'st_logg', 'sy_dist']
for column in columns_to_impute:
exoDf[column] = exoDf[column].fillna(exoDf[column].mean())
In [6]:
#make a 3d plot of all the stars? test this out
#converts degree in range 0-360 to a radian in range -pi to pi
def deg_from_neg_pi_to_pi(deg):
return (deg * np.pi / 180) - np.pi
#converts a degree in range -90 to 90 to a radian in range -pi/2 to pi/2
def deg_to_rad_90(deg):
return (deg * np.pi / 180)
raHd = hdDf['_RAJ2000'].apply(deg_from_neg_pi_to_pi)
raExo = exoDf['ra'].apply(deg_from_neg_pi_to_pi)
decHd = hdDf['_DEJ2000'].apply(deg_to_rad_90)
decExo = exoDf['dec'].apply(deg_to_rad_90)
# Set a higher DPI for better resolution
fig = plt.figure(figsize=(15, 10), dpi=500)
ax = plt.subplot(111, projection='aitoff')
# Initialize the plot with initial data
sc_hd = ax.scatter(raHd, decHd, s=1, c='blue', alpha=0.1)
sc_exo = ax.scatter(raExo, decExo, s=1, c='red', alpha=0.3)
ax.set_xlabel('Right Ascension')
ax.set_ylabel('Declination')
ax.set_title('Systems with Confirmed Exoplanets')
ax.grid(True)
ax.set_facecolor('white')
# Update function for animation
def update(frame):
global sc_hd, sc_exo, sc_zero_line
# Shift in radians
shift_radians = np.deg2rad(frame) % (2 * np.pi)
# Update the RA for HD stars
new_ra_hd = ((raHd + shift_radians + np.pi) % (2 * np.pi)) - np.pi
# Update the RA for exoplanets
new_ra_exo = ((raExo + shift_radians + np.pi) % (2 * np.pi)) - np.pi
# Update the scatter plot data
sc_hd.set_offsets(np.column_stack((new_ra_hd, decHd)))
sc_exo.set_offsets(np.column_stack((new_ra_exo, decExo)))
return sc_hd, sc_exo
#uncomment if you want to make the animation!
#ani = animation.FuncAnimation(fig, update, frames=np.arange(0, 360, 3), interval = 100, blit=True, repeat=True)
#ani.save('stars_animation.gif', writer='ffmpeg', dpi=200)
plt.show()